R/FastQC analysis.R

Defines functions bquality adapter New_FastQCresultsForAll New_FastQCresults FastQCresultsForAll FastQCresults

##################################################################################################################################################################
######################################################################### FONCTIONS ####################################################################
##################################################################################################################################################################


FastQCresults <- function(input) {

  # Modified code from the GitHub of Tony Breyal : tonybreyal/Blog-Reference-Functions/R/htmlToText/htmlToText.R

  require(RCurl) # require packages
  require(XML) # require packages

  file <- function(input) {
    char.vec <- readLines(input, warn = FALSE)
    return(paste(char.vec, collapse = ""))
  }

  convert_html_to_text <- function(html) {
    doc <- htmlParse(html, asText = TRUE)
    text <- xpathSApply(doc, "//text()[not(ancestor::script)][not(ancestor::style)][not(ancestor::noscript)][not(ancestor::form)]", xmlValue)
    return(text)
  }

f <- file(input)
a <- convert_html_to_text(f)

results <- c(a[4], ";", a[3],";", a[28], ";",a[30],";", a[32],";", a[34])
# (a[4] = "Name", a[3] = "Date", a[28] = "Total Sequences", a[30] = "Sequences flagged as poor quality",  a[32] = "Sequence length",  a[34] = "%GC")
results <- data.frame(t(unlist(results)))
names(results) <- NULL
rownames(results) <- NULL
return(results)

}

# Catch all information for all FastQC files generated by Galaxy and print it in a file ("FastQCanalysis.csv")
FastQCresultsForAll <- function(id){
  #### By Pierre-Louis Stenger (Pierrelouis.stenger@gmail.com) ####
  id <- list.files(pattern = ".html") # Find all the names of files with html extension
  sink("FastQCanalysis.csv", append=TRUE) # Create csv file to put the informations
  cat("Name",";", "Date", ";","Total Sequences", ";","Sequences flagged as poor quality", ";","Sequence length", ";","%GC", ";","\n") # Give a header of this file
  for(i in id){
    a <- FastQCresults(i)
    print(a)
  }
  sink()
  sink()
  sink()

}

New_FastQCresults <- function(input) {

  # Modified code from the GitHub of Tony Breyal : tonybreyal/Blog-Reference-Functions/R/htmlToText/htmlToText.R

  require(RCurl) # require packages
  require(XML) # require packages

  file <- function(input) {
    char.vec <- readLines(input, warn = FALSE)
    return(paste(char.vec, collapse = ""))
  }

  convert_html_to_text <- function(html) {
    doc <- htmlParse(html, asText = TRUE)
    text <- xpathSApply(doc, "//text()[not(ancestor::script)][not(ancestor::style)][not(ancestor::noscript)][not(ancestor::form)]", xmlValue)
    return(text)
  }

f <- file(input)
a <- convert_html_to_text(f)

results <- c(a[4], ";", a[3],";", a[27], ";",a[29],";", a[31],";", a[33])
# (a[4] = "Name", a[3] = "Date", a[28] = "Total Sequences", a[30] = "Sequences flagged as poor quality",  a[32] = "Sequence length",  a[34] = "%GC")
results <- data.frame(t(unlist(results)))
names(results) <- NULL
rownames(results) <- NULL
return(results)

}


# Catch all information for all FastQC files generated by Galaxy and print it in a file ("FastQCanalysis.csv")
New_FastQCresultsForAll <- function(id){
  #### By Pierre-Louis Stenger (Pierrelouis.stenger@gmail.com) ####
  id <- list.files(pattern = ".html") # Find all the names of files with html extension
  sink("FastQCanalysis.txt", append=TRUE) # Create csv file to put the informations
  cat("Name",";", "Date", ";","Total Sequences", ";","Sequences flagged as poor quality", ";","Sequence length", ";","%GC", ";","\n") # Give a header of this file
  for(i in id){
    a <- New_FastQCresults(i)
    print(a)
  }
  sink()
  sink()
  sink()

}


# # Test
# FastQCresultsForAllDesk <- function(id){
#   id <- list.files(pattern = ".html")
#   for(i in id){
#     a <- FastQCresults(i)
#     b <- print(a)
#   }
#   return(b)
# }


# To catch all the adapter content graph of all FastQC create by Galaxy
adapter <- function(id){
  #### By Pierre-Louis Stenger (Pierrelouis.stenger@gmail.com) ####
  require(grDevices) # require packages
  require(imager) # require packages
  pdf("Adapters content.pdf", height=10,width=10) # create the PDF
  id <- list.files(pattern = "adapter_content.png", recursive = TRUE) # catch all the images call "adapter_content.png" in all folders
  nb <- length(list.files(pattern = "adapter_content.png", recursive = TRUE)) # find the number of all the images call "adapter_content.png" in all folders

  for(i in id){ # create the loop --> for one image (i) in all images (id)...
    a <- load.image(i) # ... find this image and load it...
    print(plot(a, main = i)) # ... print it in the pdf
  }
   dev.off() # close the pdf
}

# To catch all the per base quality graph of all FastQC create by Galaxy
bquality <- function(id){
  #### By Pierre-Louis Stenger (Pierrelouis.stenger@gmail.com) ####
  require(grDevices) # require packages
  require(imager) # require packages
  pdf("Per base quality.pdf", height=10,width=10) # create the PDF
  id <- list.files(pattern = "per_base_quality.png", recursive = TRUE) # catch all the images call "adapter_content.png" in all folders
  nb <- length(list.files(pattern = "per_base_quality.png", recursive = TRUE)) # find the number of all the images call "adapter_content.png" in all folders

  for(i in id){ # create the loop --> for one image (i) in all images (id)...
    a <- load.image(i) # ... find this image and load it...
    print(plot(a, main = i)) # ... print it in the pdf
  }
   dev.off() # close the pdf
}



##################################################################################################################################################################
##################################################################################################################################################################

# Catch the information for one FastQC file generated by Galaxy
# FastQCresults("HI.4112.002.D710---D501.X18_R2_fastqc.html")

# Catch all information for all FastQC files generated by Galaxy and print it in a file ("FastQCanalysis.csv")
# FastQCresultsForAll()

# To obtain all adapters content of all FastQC create by Galaxy in one pdf file ("Adapters content.pdf")
# adapter()

# To obtain all the per base quality of all FastQC create by Galaxy in one pdf file ("Per base quality.pdf")
# bquality()

##################################################################################################################################################################
##################################################################################################################################################################
PLStenger/ReadFastQC documentation built on April 3, 2021, 4:53 a.m.